XBinsLog = logspace(-1,2,100);
resize_figure(figS1,2*55,2*55);
histogram(PCC,XBinsLog,'Normalization','pdf','DisplayStyle','stairs','LineWidth',1,'EdgeColor','k');
histogram(XI,XBinsLog,'Normalization','pdf','DisplayStyle','stairs','LineWidth',1,'EdgeColor','m');
set(gca,'XScale','log','YScale','log');
legend('Data: $pdf(E_{PC})$','Model input: $pdf( \hat{X} )$','Interpreter','latex')
xlabel('$E_{PC} \ , \ \hat{X}$ [mV/m]');
title('Comparing model input to data');
resize_figure(figS5,2*55,2*55);
histogram(Xm,XBinsLog,'Normalization','pdf','DisplayStyle','stairs','LineWidth',1,'EdgeColor','k');
histogram(WI,XBinsLog,'Normalization','pdf','DisplayStyle','stairs','LineWidth',1,'EdgeColor','m');
set(gca,'XScale','log','YScale','log');
legend('Data: $ pdf(\hat{E}^*_m) $ ','Model: $ pdf(\hat{X}^*) $','Interpreter','latex');
xlabel('$ \hat{E}^*_m \ , \ \hat{X}^{*} $ [mV/m]');
title('Comparing model output to data');
export_fig([outputFolder,'FigureS1.png'], ...
'-r600','-png','-nocrop',figS1);
export_fig([outputFolder,'FigureS1.pdf'], ...
'-r600','-pdf','-nocrop',figS1);
end
Warning: export_fig currently supports transparent patches/areas only in PNG output.
To export transparent patches/areas to PDF, use the print command:
print(gcf, '-dpdf', 'G:\My Drive\Research\Projects\Paper 6\Draft\Version 4\NatCom\Figures\FigureS1.pdf');
resize_figure(fig4,2*120,2*120);
t=tiledlayout(2,2,"TileSpacing","compact");
plot(EeXIgWIs.XBins,EeXIgWIs.stdYgX,'k');
ylabel('$\Sigma(\hat{E}^*_m-PCC)$');
plot(EeXIgWI.XBins,EeXIgWI.stdYgX,'m');
ylabel('$ \Sigma(\hat{X}^*-\hat{X}) $');
title('Std. deviation of error','Spread of the error distribution','Interpreter',"none");
xlabel('$\hat{E}^*_m \ , \ \hat{X}^*$ [mV/m]');
plot(EeXIgWIs.XBins,100.*EeXIgWIs.stdYgX./EeXIgWIs.XBins,'k');
ylabel('$\Sigma(\hat{E}^*_m-PCC) / PCC$ [\%]');
plot(EeXIgWI.XBins,100.*EeXIgWI.stdYgX./EeXIgWI.XBins,'m');
ylabel('$\Sigma(\hat{X}^*-\hat{X}) / \hat{X} $ [\%]');
title('Std. deviation of normalized error','Spread of the normalized error distribution','Interpreter',"none");
xlabel('$\hat{E}^*_m \ , \ \hat{X}^*$ [mV/m]');
plot(EeXIgWIs.XBins,100.*EeXIgWIs.YgX./EeXIgWIs.XBins,'k');
ylabel('$mean(\hat{E}^*_m-PCC) / PCC$ [\%]');
plot(EeXIgWI.XBins,100.*EeXIgWI.YgX./EeXIgWI.XBins,'m');
ylabel('$mean(\hat{X}^*-\hat{X}) / \hat{X} $ [\%]');
title('Mean of normalized error','Normalized Bias','Interpreter',"none");
xlabel('$\hat{E}^*_m \ , \ \hat{X}^*$ [mV/m]');
plot(EeXIgWIs.XBins,EeXIgWIs.YgX,'k');
ylabel('$mean(\hat{E}^*_m-PCC)$ [mV/m]');
plot(EeXIgWI.XBins,EeXIgWI.YgX,'m');
ylabel('$mean(\hat{X}^*-\hat{X})$ [mV/m]');
title('Mean of error','Bias','Interpreter',"none");
xlabel('$\hat{E}^*_m \ , \ \hat{X}^*$ [mV/m]');
resize_figure(figS9,2*55,2*120);
t=tiledlayout(1,2,"TileSpacing","tight");
title(t,'Explaining the statistical origin of the non-linear bias','Interpreter','none','FontSize',16);
bar(log10(edges1),value1','BarLayout','stacked','BarWidth',1,'EdgeColor','none');
set(gca,'XScale','linear','YScale','log',...
'XTick',log10([0.1,1,4,10,50]),'XTickLabel',{'0.1','1','4','10','50'});
xlabel('$\hat{X}$ [mV/m]');
ylabel('$pdf(\hat{X})$ [Count Density]');
bar(log10(edges2),value2','BarLayout','stacked','BarWidth',1,'EdgeColor','none');
set(gca,'XScale','linear','YScale','log',...
'XTick',log10([0.1,1,4,10,50]),'XTickLabel',{'0.1','1','4','10','50'}, ...
xlabel('$\hat{X}^*$ [mV/m]');
title({'Model output: proportion of','corresponding inputs in color'});
cb=colorbar_thin('YLabel','$\hat{X}$');
Warning: Unable to set 'Position', 'InnerPosition', 'OuterPosition', or 'ActivePositionProperty' for objects in a TiledChartLayout
caxis(log10([edges1(1)-0.01,edges1(end)]));
cb.Ticks=log10([0.1, 1, 4, 10, 50]);
cb.TickLabels=([0.1, 1, 4, 10, 50]);
export_fig([outputFolder,'FigureS9.png'], ...
'-r600','-png','-nocrop',figS9);
export_fig([outputFolder,'FigureS9.pdf'], ...
'-r600','-pdf','-nocrop',figS9);
resize_figure(figM3,2*55,2*120);
t=tiledlayout(1,2,"TileSpacing","compact");
title(t,'Correcting error in solar wind measurement reveals:','Interpreter','none')
p1 = plot(XBins,XBins,'k');
p2 = plot_curve(EPCCgWC,'m');
ylabel('$<Y|\hat{X}^*>$');
p3 = plot_curve(EPCCgW,[0 0.5 0]);
set(gca,'YColor',[0 0 0],'YTick',[0,10,20,30,40]);
xlabel('$\hat{E}^*_m \ , \ \hat{E}^c_m $[mV/m]','Interpreter','latex');
ylabel('Polar cap potential $E_{PC}$ [mV/m]','Interpreter','latex');
legend([p3,p2,p1],{'Erroneous Data: $<E_{PC}|\hat{E}^*_m>$ ','Calibrated Data: $<E_{PC}|\hat{E}^c_m>$ ','True linear relationship'},'Location','best','Interpreter','latex');
title('A) Linearity in polar cap potential response','Interpreter',"none");
p1 = plot_curve(EYgX,'k');
p2 = plot_curve(EYsgEmC,'m');
ylabel('$<Y|\hat{X}^*>$');
p3 = plot_curve(EYsgWs,[0 0.5 0]);
set(gca,'YColor',[0 0 0],'YTick',[-4000,-3000,-2000,-1000,0]);
xlabel('$\hat{E}^*_m \ , \ \hat{E}^c_m$ [mV/m]','Interpreter','latex');
ylabel('Auroral electrojet strength $SML$ [nT]','Interpreter','latex');
legend([p3,p2,p1],{'Erroneous Data: $<SML|\hat{E}^*_m>$ ','Calibrated Data: $<SML|\hat{E}^c_m>$ ','True linear relationship'},'Location','best','Interpreter','latex');
title('B) Linearity in auroral current response','Interpreter',"none");
export_fig([outputFolder,'FigureM3.png'], ...
'-r600','-png','-nocrop',figM3);
export_fig([outputFolder,'FigureM3.pdf'], ...
'-r600','-pdf','-nocrop',figM3);
end
Warning: export_fig currently supports transparent patches/areas only in PNG output.
To export transparent patches/areas to PDF, use the print command:
print(gcf, '-dpdf', 'G:\My Drive\Research\Projects\Paper 6\Draft\Version 4\NatCom\Figures\FigureM3.pdf');
superMag = extract_superMag(superMagFile);
Fsml = griddedInterpolant(datenum(superMag.datetime),superMag.SML,'linear','none');
% Extract WIND measurements, especially Em values
wind = extract_set_of_data(windDataFolder,'wind');
% Extract polar cap index
pci = extract_set_of_data_PCI(polarCapDataFolder);
pci.PCN(pci.PCN>=999)=nan;
pci.PCS(pci.PCS>=999)=nan;
pci.PCC = 0.5.*(temp.PCN + temp.PCS); % Calculating PCC from PCN and PCS
Fpcc = griddedInterpolant(datenum(pci.datetime),pci.PCC,'linear','none');
%Extracting dataset when both PCI and Wind measurements are available
[~,piI,wiI]=intersect(pci.datetime,wind.datetime); % Simultaneous time from WIND, GEOTAIL and PCI
Em = wind2.E_kl.*10^-3; % How is EKL calculated is important
time = wind2.datetime; % Common time
PCC = Fpcc(datenum(time));
% Finding ACF of X, from data; %Explain how ACF is calculated in reference
[Me, nSample, nEnsemble] = split_series(wind.E_kl.*10^-3,2^13);
[RArraye,lage] = find_correlation(Me-nanmean(Me,1),nSample,nEnsemble,1);
acf_E = mean(RArraye(:,1:2^12)./RArraye(:,1));
acf_fit_E = fit(lag_E,acf_E','spline'); % Fitting the ACF with a spline!
[Me1, nSample1, nEnsemble1] = split_series(pci.PCC,2^13);
[RArraye1,lage1] = find_correlation(Me1-nanmean(Me1,1),nSample1,nEnsemble1,1);
acf_PCC = mean(RArraye1(:,1:2^12)./RArraye1(:,1));
acf_fit_PCC = fit(lag_PCC,acf_PCC','spline'); % Fitting the ACF with a spline!
% Calculating ACF of Error
[Me2, nSample2, nEnsemble2] = split_series(error,2^13);
[RArraye2,lage2] = find_correlation(Me2-nanmean(Me2,1),nSample2,nEnsemble2,1);
acf_var = mean(RArraye2(:,1:2^12)./RArraye2(:,1));
acf_fit_var = fit(lag_var,acf_var','spline'); % Fitting the ACF with a spline!
normalizationStr = 'countdensity';
value = zeros(length(XSegments),length(XBins)-1);
for i=1:1:length(XSegments)
[value(i,:),edges] = histcounts(Y(X<=XSegments(i)),XBins,'Normalization',normalizationStr);
edges = edges(2:end) - (edges(2)-edges(1))/2;
elseif i>1 && i < length(XSegments)
value(i,:) = histcounts(Y(X>XSegments(i-1) & X<=XSegments(i)),XBins,'Normalization',normalizationStr);
value(i,:) = histcounts(Y(X>=XSegments(i-1)),XBins,'Normalization',normalizationStr);
function p = plot_curve(curve, color)
CI1 = interp_nans(curve.CI);
p=plot(curve.XBins, curve.YgX, 'Color', color);
plot_ci(curve.XBins,CI1,color,0.2);
function plot_ci(x,ci,color,alpha)
inBetween = [ci(:,1)', fliplr(ci(:,2)')];
fill(X2,inBetween,color,'LineStyle','none','FaceAlpha',alpha);
function p = plot_2D_error(Y,X,P,yLabel)
set(p,'EdgeColor','none');
colorbar_thin('YLabel',yLabel);
function curve = create_curve(X, Y, Ei)
X1 = X(~isnan(X) & ~isnan(Y));
Y1 = Y(~isnan(X) & ~isnan(Y));
[xindx, E] = discretize(X(:),Ei);
curve.YgX(i) = nanmean(Y(xindx==i));
curve.stdYgX(i) = nanstd(Y(xindx==i));
curve.NSamples(i) = sum(xindx==i & ~isnan(Y));
curve.SEM(i) = nanstd(Y(xindx==i))./sqrt(curve.NSamples(i));
curve.ts(i,:) = tinv([0.025 0.975],curve.NSamples(i)-1);
curve.CI(i,:) = curve.YgX(i) + curve.ts(i,:)*curve.SEM(i);
curve.XBins(i) = 0.5*(E(i)+E(i+1));
function [fXgY,XX,YY] = conditional_pdf(X, Y, gridx, gridy)
X1 = X(~isnan(X) & ~isnan(Y));
Y1 = Y(~isnan(X) & ~isnan(Y));
[bandwidth,fXY,XX,YY]=kde2d([X,Y],sz,[min(gridx),min(gridy)],[max(gridx),max(gridy)]);
[fY, YY1] = ksdensity(Y,YY(:,1));
fXgY = fXY./repmat(fY,1,sz);
function [M, nSample, nEnsemble]= split_series(series,sampleSize,MaxNoOfMissingValues)
MaxNoOfMissingValues=1000;
series = padarray(series,sampleSize-mod(length(series),sampleSize),'post');
M0 = reshape(series,sampleSize,[])';
indx=sum(isnan(M0),2)>MaxNoOfMissingValues;
function [RArray,lag]=find_correlation(M,nSample,nEnsemble,sampleTime)
lag = fftshift(-nSample:1:nSample-1)'.*dt;
xCell = mat2cell(M, ones(1,nEnsemble),nSample);
[RArray] = cellfun(@(x) xcorr(x,'unbiased'),xCell,'UniformOutput',false);
RArray = cell2mat(RArray);
RArray(:,2:end+1) = RArray(:,1:end);
RArray = ifftshift(RArray);
function T1 = extract_data(loadFile)
% Extract solar wind data from OMNI High Resolution space-craft specific files
format = '%4f %4f %3f %3f %4f %4f %4.1f %6f %6.2f %6.2f %6.2f %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f %7f %6.2f %8.2f %8.2f %4f %8.1f %8.1f %8.1f %8.1f %7.2f %9.0f %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f %8.2f %7f %7f';
tempData = load_ascii_files(loadFile, format, 0);
T.datetime = datetime(tempData{1},1,tempData{2},tempData{3},tempData{4},zeros(size(tempData{4})));
T.timeshift = tempData{8}; T.timeshift(T.timeshift==999999) = nan;
T.BxGSM = tempData{13}; T.BxGSM(T.BxGSM > 9999) = nan;
T.ByGSM = tempData{16}; T.ByGSM(T.ByGSM > 9999) = nan;
T.BzGSM = tempData{17}; T.BzGSM(T.BzGSM > 9999) = nan;
T.velocity = tempData{23}; T.velocity(T.velocity > 99999) = nan;
T.B_T = (T.ByGSM.^2 + T.BzGSM.^2).^0.5;
% T.theta_kl = wrapTo2Pi(atan2(T.B_T,T.BzGSM));
T.theta_kl = wrapTo2Pi(atan2(T.ByGSM,T.BzGSM));
T.E_kl = T.velocity.*T.B_T.*(sin(T.theta_kl/2)).^2;
T.E_sw = T.velocity.*T.BzGSM;
function T1 = extract_superMag(loadFile)
% Extract auroral electrojet strength data from SuperMag database
format = '%4f %2f %2f %2f %2f %2f %7.6f %7.6f';
tempData = load_ascii_files(loadFile, format, 105);
T.datetime = datetime(tempData{1},tempData{2},tempData{3},tempData{4},tempData{5},tempData{6});
T.SML(T.SML==999999)=nan;
function T = extract_set_of_data(loadFolder,satellite)
fileStr = get_files_in_folder(loadFolder,[satellite,'_min_b*.txt']);
for i=1:1:length(fileStr)
T1 = extract_data([loadFolder,fileStr{i}]);
function T1 = extract_PCI(loadFile)
% Extracting Polar Cap Index values from the PC Index data files
format = '%4f-%2f-%2f %2f:%2f %6.2f %6.2f';
tempData = load_ascii_files(loadFile, format, 1);
T.datetime = datetime(tempData{1},tempData{2},tempData{3},tempData{4},tempData{5},zeros(size(tempData{4})));
function T = extract_set_of_data_PCI(loadFolder)
fileStr = get_files_in_folder(loadFolder,['pcnpcs*.txt']);
for i=1:1:length(fileStr)
T1 = extract_PCI([loadFolder,fileStr{i}]);
function data=load_ascii_files(loadFile, format, headerlines)
fileID = fopen(loadFile,'r');
data = textscan(fileID, format, 'headerlines', headerlines);
function y = MvLogNRand( Mu , Sigma , Simulations , CorrMat )
%MVLOGNRAND MultiVariant Lognormal random numbers with correlation
% Mu: The Lognormal parameter Mu (can be column or row vector)
% Sigma: The Lognormal parameter Sigma (can be column or row vector)
% Simulations: The Number of simulations to run (scalar)
% CorrMat: OPTIONAL A square matrix with the number of rows and columns
% equal to the number of elements in Mu/Sigma. Each element on the
% diagonal is equal to one, with the off diagonal cells equal to the
% correlation of the marginal Lognormal distributions. If not specified,
% then assume zero correlation.
% To check the simulation run corrcoef(Y) and that should be the same as
% REQUIRES THE STATISTICS TOOLBOX
% CorrMat = [1 .2 .4 ; .2 1 .5 ; .4 .5 1];
% y = MvLogNRand( Mu , Sigma , Simulations , CorrMat );
% For more information see: Aggregration of Correlated Risk Portfolios:
% Models and Algorithms; Shaun S. Wang, Phd. Casualty Actuarial Society
% Proceedings Volume LXXXV www.casact.org
% Author: Stephen Lienhard
error('Must have at least 3 input arguements')
if numel(Simulations) ~= 1 || Simulations < 0
error('The number of simulations must be greater then zero and a scalar')
CorrMat = eye(numel(Mu));
elseif size(CorrMat,1) ~= size(CorrMat,2)
error('The correlation matrix must have the same number of rows as columns')
if numel(Mu) ~= numel(Sigma)
error('Mu and Sigma must have the same number of elements')
% Calculate the covariance structure
sigma_down = repmat( Sigma' , numel(Sigma), 1 );
sigma_acrs = repmat( Sigma , 1 , numel(Sigma) );
covv = log( CorrMat .* sqrt(exp(sigma_down.^2)-1) .* ...
sqrt(exp(sigma_acrs.^2)-1) + 1 );
y = exp( mvnrnd( Mu , covv , Simulations ));
function y = MvNRand( Mu , Sigma , Simulations , CorrMat )
%MVLOGNRAND MultiVariant Lognormal random numbers with correlation
% Mu: The Lognormal parameter Mu (can be column or row vector)
% Sigma: The Lognormal parameter Sigma (can be column or row vector)
% Simulations: The Number of simulations to run (scalar)
% CorrMat: OPTIONAL A square matrix with the number of rows and columns
% equal to the number of elements in Mu/Sigma. Each element on the
% diagonal is equal to one, with the off diagonal cells equal to the
% correlation of the marginal Lognormal distributions. If not specified,
% then assume zero correlation.
% To check the simulation run corrcoef(Y) and that should be the same as
% REQUIRES THE STATISTICS TOOLBOX
% CorrMat = [1 .2 .4 ; .2 1 .5 ; .4 .5 1];
% y = MvLogNRand( Mu , Sigma , Simulations , CorrMat );
% For more information see: Aggregration of Correlated Risk Portfolios:
% Models and Algorithms; Shaun S. Wang, Phd. Casualty Actuarial Society
% Proceedings Volume LXXXV www.casact.org
% Author: Stephen Lienhard
error('Must have at least 3 input arguements')
if numel(Simulations) ~= 1 || Simulations < 0
error('The number of simulations must be greater then zero and a scalar')
CorrMat = eye(numel(Mu));
elseif size(CorrMat,1) ~= size(CorrMat,2)
error('The correlation matrix must have the same number of rows as columns')
if numel(Mu) ~= numel(Sigma)
error('Mu and Sigma must have the same number of elements')
% Calculate the covariance structure
sigma_down = repmat( Sigma' , numel(Sigma), 1 );
sigma_acrs = repmat( Sigma , 1 , numel(Sigma) );
covv = log( CorrMat .* sqrt(exp(sigma_down.^2)-1) .* ...
sqrt(exp(sigma_acrs.^2)-1) + 1 );
y = mvnrnd( Mu , covv , Simulations );
function y = MvtRand( Mu , Nu , Simulations , CorrMat )
%MVLOGNRAND MultiVariant Lognormal random numbers with correlation
% Mu: The Lognormal parameter Mu (can be column or row vector)
% Sigma: The Lognormal parameter Sigma (can be column or row vector)
% Simulations: The Number of simulations to run (scalar)
% CorrMat: OPTIONAL A square matrix with the number of rows and columns
% equal to the number of elements in Mu/Sigma. Each element on the
% diagonal is equal to one, with the off diagonal cells equal to the
% correlation of the marginal Lognormal distributions. If not specified,
% then assume zero correlation.
% To check the simulation run corrcoef(Y) and that should be the same as
% REQUIRES THE STATISTICS TOOLBOX
% CorrMat = [1 .2 .4 ; .2 1 .5 ; .4 .5 1];
% y = MvLogNRand( Mu , Sigma , Simulations , CorrMat );
% For more information see: Aggregration of Correlated Risk Portfolios:
% Models and Algorithms; Shaun S. Wang, Phd. Casualty Actuarial Society
% Proceedings Volume LXXXV www.casact.org
% Author: Stephen Lienhard
error('Must have at least 3 input arguements')
if numel(Simulations) ~= 1 || Simulations < 0
error('The number of simulations must be greater then zero and a scalar')
CorrMat = eye(numel(Mu));
elseif size(CorrMat,1) ~= size(CorrMat,2)
error('The correlation matrix must have the same number of rows as columns')
% Calculate the covariance structure
% sigma_down = repmat( Sigma' , numel(Sigma), 1 );
% sigma_acrs = repmat( Sigma , 1 , numel(Sigma) );
% covv = log( CorrMat .* sqrt(exp(sigma_down.^2)-1) .* ...
% sqrt(exp(sigma_acrs.^2)-1) + 1 );
y = Mu' + mvtrnd(CorrMat, Nu, Simulations);
function [bandwidth,density,X,Y]=kde2d(data,n,MIN_XY,MAX_XY)
% fast and accurate state-of-the-art
% bivariate kernel density estimator
% with diagonal bandwidth matrix.
% The kernel is assumed to be Gaussian.
% The two bandwidth parameters are
% chosen optimally without ever
% using/assuming a parametric model for the data or any "rules of thumb".
% Unlike many other procedures, this one
% is immune to accuracy failures in the estimation of
% multimodal densities with widely separated modes (see examples).
% INPUTS: data - an N by 2 array with continuous data
% n - size of the n by n grid over which the density is computed
% n has to be a power of 2, otherwise n=2^ceil(log2(n));
% the default value is 2^8;
% MIN_XY,MAX_XY- limits of the bounding box over which the density is computed;
% MIN_XY=[lower_Xlim,lower_Ylim]
% MAX_XY=[upper_Xlim,upper_Ylim].
% The dafault limits are computed as:
% MAX=max(data,[],1); MIN=min(data,[],1); Range=MAX-MIN;
% MAX_XY=MAX+Range/4; MIN_XY=MIN-Range/4;
% OUTPUT: bandwidth - a row vector with the two optimal
% bandwidths for a bivaroate Gaussian kernel;
% bandwidth=[bandwidth_X, bandwidth_Y];
% density - an n by n matrix containing the density values over the n by n grid;
% density is not computed unless the function is asked for such an output;
% X,Y - the meshgrid over which the variable "density" has been computed;
% the intended usage is as follows:
% Example (simple Gaussian mixture)
% % generate a Gaussian mixture with distant modes
% randn(500,1)+3.5, randn(500,1);];
% [bandwidth,density,X,Y]=kde2d(data);
% % plot the data and the density estimate
% contour3(X,Y,density,50), hold on
% plot(data(:,1),data(:,2),'r.','MarkerSize',5)
% Example (Gaussian mixture with distant modes):
% % generate a Gaussian mixture with distant modes
% data=[randn(100,1), randn(100,1)/4;
% randn(100,1)+18, randn(100,1);
% randn(100,1)+15, randn(100,1)/2-18;];
% [bandwidth,density,X,Y]=kde2d(data);
% % plot the data and the density estimate
% surf(X,Y,density,'LineStyle','none'), view([0,60])
% colormap hot, hold on, alpha(.8)
% set(gca, 'color', 'blue');
% plot(data(:,1),data(:,2),'w.','MarkerSize',5)
% Example (Sinusoidal density):
% X=rand(1000,1); Y=sin(X*10*pi)+randn(size(X))/3; data=[X,Y];
% [bandwidth,density,X,Y]=kde2d(data);
% % plot the data and the density estimate
% surf(X,Y,density,'LineStyle','none'), view([0,70])
% colormap hot, hold on, alpha(.8)
% set(gca, 'color', 'blue');
% plot(data(:,1),data(:,2),'w.','MarkerSize',5)
% Kernel density estimation via diffusion
% Z. I. Botev, J. F. Grotowski, and D. P. Kroese (2010)
% Annals of Statistics, Volume 38, Number 5, pages 2916-2957.
n=2^ceil(log2(n)); % round up n to the next power of 2;
MAX=max(data,[],1); MIN=min(data,[],1); Range=MAX-MIN;
MAX_XY=MAX+Range/2; MIN_XY=MIN-Range/2;
error('data has to be an N by 2 array where each row represents a two dimensional observation')
transformed_data=(data-repmat(MIN_XY,N,1))./repmat(scaling,N,1);
%bin the data uniformly using regular grid;
initial_data=ndhist(transformed_data,n);
% discrete cosine transform of initial data
% now compute the optimal bandwidth^2
t_star=root(@(t)(t-evolve(t)),N);
p_02=func([0,2],t_star);p_20=func([2,0],t_star); p_11=func([1,1],t_star);
t_y=(p_02^(3/4)/(4*pi*N*p_20^(3/4)*(p_11+sqrt(p_20*p_02))))^(1/3);
t_x=(p_20^(3/4)/(4*pi*N*p_02^(3/4)*(p_11+sqrt(p_20*p_02))))^(1/3);
% smooth the discrete cosine transform of initial data using t_star
a_t=exp(-(0:n-1)'.^2*pi^2*t_x/2)*exp(-(0:n-1).^2*pi^2*t_y/2).*a;
% now apply the inverse discrete cosine transform
density=idct2d(a_t)*(numel(a_t)/prod(scaling));
density(density<0)=eps; % remove any negative density values
[X,Y]=meshgrid(MIN_XY(1):scaling(1)/(n-1):MAX_XY(1),MIN_XY(2):scaling(2)/(n-1):MAX_XY(2));
bandwidth=sqrt([t_x,t_y]).*scaling;
%#######################################
function [out,time]=evolve(t)
Sum_func = func([0,2],t) + func([2,0],t) + 2*func([1,1],t);
time=(2*pi*N*Sum_func)^(-1/3);
%#######################################
Sum_func=func([s(1)+1,s(2)],t)+func([s(1),s(2)+1],t); const=(1+1/2^(sum(s)+1))/3;
time=(-2*const*K(s(1))*K(s(2))/N/Sum_func)^(1/(2+sum(s)));
%#######################################
w=exp(-I*pi^2*Time).*[1,.5*ones(1,length(I)-1)];
out=(-1)^sum(s)*(wy*A2*wx')*pi^(2*sum(s));
%#######################################
out=(-1)^s*prod((1:2:2*s-1))/sqrt(2*pi);
%#######################################
function data=dct2d(data)
% computes the 2 dimensional discrete cosine transform of data
[nrows,ncols]= size(data);
error('data is not a square array!')
% Compute weights to multiply DFT coefficients
w = [1;2*(exp(-i*(1:nrows-1)*pi/(2*nrows))).'];
weight=w(:,ones(1,ncols));
data=dct1d(dct1d(data)')';
function transform1d=dct1d(x)
% Re-order the elements of the columns of x
x = [ x(1:2:end,:); x(end:-2:2,:) ];
% Multiply FFT by weights:
transform1d = real(weight.* fft(x));
%#######################################
function data = idct2d(data)
% computes the 2 dimensional inverse discrete cosine transform
[nrows,ncols]=size(data);
w = exp(i*(0:nrows-1)*pi/(2*nrows)).';
weights=w(:,ones(1,ncols));
data=idct1d(idct1d(data)');
y = real(ifft(weights.*x));
out = zeros(nrows,ncols);
out(1:2:nrows,:) = y(1:nrows/2,:);
out(2:2:nrows,:) = y(nrows:-1:nrows/2+1,:);
%#######################################
function binned_data=ndhist(data,M)
% this function computes the histogram
% of an n-dimensional data set;
% 'data' is nrows by n columns
% M is the number of bins used in each dimension
% so that 'binned_data' is a hypercube with
% size length equal to M;
[nrows,ncols]=size(data);
[dum,bins(:,i)] = histc(data(:,i),[0:1/M:1],1);
bins(:,i) = min(bins(:,i),M);
% Combine the vectors of 1D bin counts into a grid of nD bin
binned_data = accumarray(bins(all(bins>0,2),:),1/nrows,M(ones(1,ncols)));
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
% try to find smallest root whenever there is more than one
N=50*(N<=50)+1050*(N>=1050)+N*((N<1050)&(N>50));
tol=10^-12+0.01*(N-50)/1000;
tol=min(tol*2,.1); % double search interval
if tol==.1 % if all else fails
t=fminbnd(@(x)abs(f(x)),0,.1); flag=1;